// Copyright 2024 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Information about aptamer sequences.

syntax = "proto3";

package xxx.aptamers;

// Information specific to an aptamer, such as its sequence and primers.
message Aptamer {
  // The sequence of the non-primer regions.
  // This field will be empty if the sequence is the same as the
  // cluster_sequence below. Since most sequences are in a cluster by
  // themselves, this saves a lot of room in the output.
  string sequence = 1;

  // The forward primer sequence
  string forward_primer = 2;

  // The reverse primer sequence
  // (Given as same strand as forward primer and sequence, i.e. the
  // full aptamer sequence is forward_primer + sequence + reverse_primer
  // and if you were doing PCR you'd order the reverse complement of
  // this reverse primer sequence.)
  string reverse_primer = 3;

  // The cluster id for this cluster
  int64 cluster_id = 4;

  // The sequence to represent this cluster (generally the sequence with the
  // most counts)
  string cluster_sequence = 5;

  // A map from the measurement_id to the int count for that sequenced pool.
  // See the experiment pbtxt metadata for details on the measurement_ids.
  // Only measurement_ids with non-zero counts are included here.
  // This map can be empty for sequences never seen in the experiment (i.e.
  // those sequence invented for a microarray.)
  map<int32, int32> counts = 6;

  // A map from the measurement_id to the float microarray value.
  // See the experiment pbtxt metadata for details on the measurement_ids.
  // Only measurement_ids with this sequences are included here.
  // This map will be empty for sequences found only in the count data and
  // never tested on a microarray.
  map<int32, float> microarrays = 7;
}

// Information about a cluster in which multiple aptamers may be related.
message Cluster {
  // The unique ID of the cluster.
  int64 cluster_id = 1;
}

// A convenience message that is used for serially reading and writing RecordIO.
message SequenceClusterLink {
  // The sequence of the aptamer of interest.
  string sequence = 1;

  // The unique ID of the cluster the aptamer is assigned to.
  int64 cluster_id = 2;
}
